ext_match <- function(data1, data2, treatment_var, pat_id, pair_id, final_table_labels, bin_vars, con_vars, vars_ordered,
                      bin_outcomes, con_outcomes){

  #### This function assumes you have already read the two datasets in. ####
  #### treatment_var is a 0/1 variable that tells which observations are the controls and which are the cases. ####
  #### pat_id is the actual patient id. ####
  #### pair_id is the pair ID from the match. ####
  #### final_table_labels are the colnames of the final table that will be produced. Should be a character vector. ####
  #### bin_vars are the binary variables that will be presented. ####
  #### con_vars are the continuous variables that will be presented. ####
  #### vars_ordered are all of the binary and continuous variables. The variables that occur in both matches are listed first, and the variables occuring in only the presentation match follow. ####
  
  #### Read in the library that contains the exterior match function. ####
  library(exteriorMatch)
  
  #### Write the McNemar function. ####
  mcnemar_test <- function(data, treatment, comparison, variable, pair_id){
    
    
    test_data_t1 <- data[which(data[,treatment]==1),c(pair_id,variable)]
    test_data_t0 <- data[which(data[,treatment]==0),c(pair_id,variable)]
    
    colnames(test_data_t1)[2] <- c("var_t1")
    colnames(test_data_t0)[2] <- c("var_t0")
    
    test_data <- merge(x=test_data_t1, y=test_data_t0, by=c(pair_id))
    
    mctable <- table(test_data$var_t0, test_data$var_t1)
    test <- mcnemar.test(x=mctable)
    ci <- mcnemar.exact(x=mctable, conf.level=0.95)
    print(mctable)
    
    #### Get rate of binary variable. ####
    
    overall_rate <- mean(data[,variable], na.rm=TRUE)
    t1_rate <- mean(data[which(data[,treatment]==1),variable],na.rm=TRUE)
    t0_rate <- mean(data[which(data[,treatment]==0),variable],na.rm=TRUE)
    
    mcnemar_test_vector <- c(comparison, variable, test$statistic, test$p.value, test$method, ci$estimate, ci$conf.int, overall_rate, t1_rate, t0_rate)
    names(mcnemar_test_vector) <- c("comparison", "Outcome", "statistic (Chi-square)","McNemar p.value","method","Paired Odds Ratio", "Odds Ratio Lower 95%","Odds Ratio Upper 95%", paste("New and Exp.", variable, "Rate", sep=" "), "New Surgeon Rate", "Exp. Surgeon Rate")
    
    
    return(mcnemar_test_vector)
    
  }
  
  
  original_dim_data1 <- dim(data1)[1]
  original_dim_data2 <- dim(data2)[1]
  original_data1 <- data1
  original_data2 <- data2
  
  
  #### CHECK to see if the datasets have the same number of cases. If not, then only keep the intersection of both matched cases. ####
  if(dim(data1)[1] != dim(data2)[1]){
    
    #### Get ids of case patients from both datasets. ####
    data1_ids <- data1[which(data1[,treatment_var]==1),]
    data2_ids <- data2[which(data2[,treatment_var]==1),]
    
    #### Get the patient ids in the first dataset that are not in the second dataset, and the ids that are in the second dataset but not the first. ####
    non_intersect_1 <- setdiff(data1_ids[,pat_id], data2_ids[,pat_id])
    non_intersect_2 <- setdiff(data2_ids[,pat_id], data1_ids[,pat_id])
    
    #### Get the pair_ids for the cases that are to be removed, so that we can also remove their controls. ####
    data1_to_remove <- data1_ids[which(data1_ids[,pat_id] %in% non_intersect_1),]
    data2_to_remove <- data2_ids[which(data2_ids[,pat_id] %in% non_intersect_2),]
    
    data1_to_remove_pair_ids <- data1_to_remove[,pair_id]
    data2_to_remove_pair_ids <- data2_to_remove[,pair_id]
    
    print(data1_to_remove_pair_ids)
    print(data2_to_remove_pair_ids)
    
    #### Remove the cases that are not common to both datasets, along with their matched controls. ####
    data1 <- data1[which(!(data1[,pair_id] %in% data1_to_remove_pair_ids)),]
    data2 <- data2[which(!(data2[,pair_id] %in% data2_to_remove_pair_ids)),]
    
  }
  
  
  
  
 #### For the first dataset, get the control patients that were matched to the case patients. ####
  control_pats_1 <- data1[which(data1[,treatment_var]==0), c(pat_id, pair_id)]
  colnames(control_pats_1)[1] <- "control_id"
  
 #### Get the case patients from the first dataset. ####
  case_pats_1 <- data1[which(data1[,treatment_var]==1), c(pat_id, pair_id)]
  colnames(case_pats_1)[1] <- "case_id"
  
  #### Get the control patients from the second dataset. ####
  control_pats_2 <- data2[which(data2[,treatment_var]==0),c(pat_id, pair_id)]
  colnames(control_pats_2)[1] <- "control_id"
  
  #### Get the case patients from the second dataset. ####
  case_pats_2 <- data2[which(data2[,treatment_var]==1),c(pat_id, pair_id)]
  colnames(case_pats_2)[1] <- "case_id"
  
  
  #### Merge the case and control patients. This will be used to sort the control patients based on the case patient they were matched to. ####
  data1_merge <-  merge(x=control_pats_1, y=case_pats_1, by=c(pair_id))
  data1_merge <- data1_merge[order(data1_merge[,c("case_id")]),]
  
  data2_merge <- merge(x=control_pats_2, y=case_pats_2, by=c(pair_id))
  data2_merge <- data2_merge[order(data2_merge[,c("case_id")]),]
  
  #### Get a vector of only the control ids. ####
  control_vec_1 <- subset(data1_merge, select=c("control_id"))
  control_vec_1 <- control_vec_1[,1]
  
  control_vec_2 <- subset(data2_merge, select=c("control_id"))
  control_vec_2 <- control_vec_2[,1]
  
  
  #### Do the Exterior Match. ####
  
  ext_match <- exterior(control_vec_1, control_vec_2)
  ext_pair_id <- c(1:length(ext_match$id1))
  
  control_group_1 <- as.data.frame(cbind(ext_match$id1, ext_pair_id))
  control_group_2 <- as.data.frame(cbind(ext_match$id2, ext_pair_id))
  colnames(control_group_1)[1] <- pat_id
  colnames(control_group_2)[1] <- pat_id
  
  
  #### Get the controls and all of their variables and put them into datasets. ####
  
  controls_1 <- merge(x=data1, y=control_group_1, by=c(pat_id))
  controls_2 <- merge(x=data2, y=control_group_2, by=c(pat_id))
  
  
  #### Produce output table. ####
  final_table <- matrix(0, nrow=(length(vars_ordered)+1), ncol=6)
  
  #### Get all five groups (Case, controls from match 1 before ext match, controls from match 2 before ext match, controls from match 1 after ext match and controls from match 2 after ext match). ####
  case_final <- original_data1[which(original_data1[,treatment_var]==1),]
  controls_final_before_1 <- original_data1[which(original_data1[,treatment_var]==0),]
  controls_final_before_2 <- original_data2[which(original_data2[,treatment_var]==0),]
  controls_final_after_1 <- data1[which(data1[,pat_id] %in% control_group_1[,pat_id]),]
  controls_final_after_2 <- data2[which(data2[,pat_id] %in% control_group_2[,pat_id]),]
  print(dim(controls_final_after_1))
  
  controls_final_after_1$which_match <- 0
  controls_final_after_2$which_match <- 1
  controls_final_after_1_2 <- rbind.fill(controls_final_after_1, controls_final_after_2)

  #### Create table. ####
  
  for(i in 1:length(vars_ordered)){
    
    print(vars_ordered[i])
    
    if(vars_ordered[i] %in% con_vars){
       
      if(!(vars_ordered[i] %in% con_outcomes)){ #### Do tests for continuous variables
      
        
      final_table[i+1,] <- c(mean(case_final[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_before_1[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_before_2[,vars_ordered[i]], na.rm=TRUE),
                           mean(controls_final_after_1[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_after_2[,vars_ordered[i]], na.rm=TRUE),
                           wilcox.test(x=controls_final_after_1[,vars_ordered[i]], y=controls_final_after_2[,vars_ordered[i]], conf.level=0.95)$p.value)
      
      } else{ #### Do test for continuous outcomes.
        
        
        final_table[i+1,] <- c(mean(case_final[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_before_1[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_before_2[,vars_ordered[i]], na.rm=TRUE),
                               mean(controls_final_after_1[,vars_ordered[i]], na.rm=TRUE), mean(controls_final_after_2[,vars_ordered[i]], na.rm=TRUE),
                               wilcox.test(x=controls_final_after_1[,vars_ordered[i]], y=controls_final_after_2[,vars_ordered[i]], conf.level=0.95)$p.value)
        
      }
      
    } else{ #### If variable/outcome is binary.
      
      
      if(!(vars_ordered[i] %in% con_outcomes)){ #### Do tests on binary variables.
        
        temp_table <- table(controls_final_after_1_2[,"which_match"], controls_final_after_1_2[,vars_ordered[i]])
        if(nrow(temp_table)!=2 | ncol(temp_table)!=2){
          
          temp_fisher <- ""
          
        } else{
          temp_fisher <- fisher.test(x=controls_final_after_1_2[,"which_match"], y=controls_final_after_1_2[, vars_ordered[i]])$p.value
        }
        
      final_table[i+1,] <- c((sum(case_final[,vars_ordered[i]], na.rm=T)/nrow(case_final))*100,
                           (sum(controls_final_before_1[,vars_ordered[i]], na.rm=T)/nrow(controls_final_before_1))*100,
                           (sum(controls_final_before_2[,vars_ordered[i]], na.rm=T)/nrow(controls_final_before_2))*100,
                           (sum(controls_final_after_1[,vars_ordered[i]], na.rm=T)/nrow(controls_final_after_1))*100,
                           (sum(controls_final_after_2[,vars_ordered[i]], na.rm=T)/nrow(controls_final_after_2))*100,
                          temp_fisher)
      
      } else { #### Do tests on binary outcomes. 
        
        temp_table <- table(controls_final_after_1_2[,"which_match"], controls_final_after_1_2[,vars_ordered[i]])
        if(nrow(temp_table)!=2 | ncol(temp_table)!=2){
          
          temp_fisher <- ""
          
        } else{
          temp_fisher <- fisher.test(x=controls_final_after_1_2[,"which_match"], y=controls_final_after_1_2[, vars_ordered[i]])$p.value
        }
        
        final_table[i+1,] <- c((sum(case_final[,vars_ordered[i]], na.rm=T)/nrow(case_final))*100,
                               (sum(controls_final_before_1[,vars_ordered[i]], na.rm=T)/nrow(controls_final_before_1))*100,
                               (sum(controls_final_before_2[,vars_ordered[i]], na.rm=T)/nrow(controls_final_before_2))*100,
                               (sum(controls_final_after_1[,vars_ordered[i]], na.rm=T)/nrow(controls_final_after_1))*100,
                               (sum(controls_final_after_2[,vars_ordered[i]], na.rm=T)/nrow(controls_final_after_2))*100,
                               fisher.test(x=controls_final_after_1_2[,"which_match"], y=controls_final_after_1_2[, vars_ordered[i]])$p.value)
        
      }
      
    }
    
  }
  
  numb_vec <- c(original_dim_data1/2, original_dim_data1/2, original_dim_data2/2, dim(controls_final_after_1)[1], dim(controls_final_after_2)[1], "")
  final_table[1,] <- numb_vec
  
  vars_ordered <- c("number", vars_ordered)
  rownames(final_table) <- c(vars_ordered)
  colnames(final_table) <- final_table_labels
  # round(final_table,4)
  
  return(final_table)
  
}